Library necessary packages
library(KernSmooth)
library(NMF)
library(rsvd)
library(Rtsne)
library(ggplot2)
library(cowplot)
library(sva)
library(igraph)
library(cccd)
library(destiny)
library(stringr)
library(reshape2)
library(formatR)
source('Fxns.R')
Load data
atlas_full_tpm <- load_data("./Extend_data/GSE92332_AtlasFullLength_TPM.txt.gz")
## [1] "File exists!"
## 2017-12-25 03:35:54 INFO: Data dimensions: 20108x1522
atlas_full_tpm <- data.frame(log2(1 + atlas_full_tpm))
Select variables
v = get.variable.genes(atlas_full_tpm, min.cv2 = 100)
## 2017-12-25 03:36:03 INFO: Fitting only the 14586 genes with mean expression > 0.0329260912376799

## 2017-12-25 03:36:04 INFO: Found 5196 variable genes (p<0.05)
var.genes = as.character(rownames(v)[v$p.adj < 0.05])
atlas_full_tpm_sample <- colnames(atlas_full_tpm)
cells.group <- unlist(lapply(atlas_full_tpm_sample, function(x) return(str_split(x,
"_")[[1]][4])))
TSNE,PCA data
pca <- read.table("./Extend_data/AtlasFull_pca_scores.txt")
tsne.rot <- PCA_TSNE.scores(data.tpm = atlas_full_tpm, data.umis = atlas_full_tpm,
var_genes = var.genes, data_name = "./Extend_data/AtlasFull")
## ./Extend_data/AtlasFull_pca_scores.txt exists
## ./Extend_data/AtlasFull_tsne_scores.txt exists
colnames(tsne.rot) <- c("tSNE_1", "tSNE_2")
tsne.rot <- as.data.frame(tsne.rot)
Mark genes
# stem mark genes
stem_mark_genes <- c("Lgr5", "Ascl2", "Slc12a2", "Axin2", "Olfm4", "Gkn3")
Genes_mean_tpm(stem_mark_genes, tpm_data = atlas_full_tpm, tsne_data = tsne.rot,
title = "Stem")

# Cell cycle
cell_cycle_mark_genes <- c("Mki67", "Cdk4", "Mcm5", "Mcm6", "Pcna")
Genes_mean_tpm(cell_cycle_mark_genes, tpm_data = atlas_full_tpm, tsne_data = tsne.rot,
title = "Cell cycle")

# Enterocyte
Enterocyte_mark_genes <- c("Alpi", "Apoa1", "Apoa4", "Fabp1")
Genes_mean_tpm(Enterocyte_mark_genes, tpm_data = atlas_full_tpm, tsne_data = tsne.rot,
title = "Enterocyte")

# Globlet
Globlet_mark_genes <- c("Muc2", "Clca1", "Tff3", "Agr2")
Genes_mean_tpm(Globlet_mark_genes, tpm_data = atlas_full_tpm, tsne_data = tsne.rot,
title = "Globet")

# Paneth
Paneth_mark_genes <- c("Lyz1", "Defa17", "Defa22", "Defa24", "Ang4")
Genes_mean_tpm(Paneth_mark_genes, tpm_data = atlas_full_tpm, tsne_data = tsne.rot,
title = "Paneth")

# Enteroendocrine
Enteroendocrine_mark_genes <- c("Chga", "Chgb", "Tac1", "Tph1", "Neurog3")
Genes_mean_tpm(Enteroendocrine_mark_genes, tpm_data = atlas_full_tpm, tsne_data = tsne.rot,
title = "Enteroendocrine")

# Tuft
Tuft_mark_genes <- c("Dclk1", "Trpm5", "Gfi1b", "Il25")
Genes_mean_tpm(Tuft_mark_genes, tpm_data = atlas_full_tpm, tsne_data = tsne.rot,
title = "Tuft")

# number of detected genes
Count_genes <- function(x) {
count <- 0
for (c in x) {
if (c != 0) {
count <- count + 1
}
}
return(count)
}
Genes_per_cell <- as.numeric(apply(atlas_full_tpm, 2, Count_genes))
ggplot(tsne.rot, aes(x = tSNE_1, y = tSNE_2)) + geom_point(aes(color = Genes_per_cell)) +
ggtitle("Genes/Cell") + theme(legend.title = element_text("Genes/Cell",
size = 8, color = "blue", face = "bold"), legend.position = "right") + scale_color_gradient2(low = "lightblue",
mid = "green", high = "red")

Figure e
LRRS <- read.table("./Extend_data/LRRS.txt")
LRRS <- as.character(LRRS$V1)
LRRs.ann <- Heatmap_fun(genes = LRRS, tpm.data = atlas_full_tpm, condition = unique(cells.group),
all.condition = cells.group)
## There ara 9 conditions
## Whether creat data accurate 0
NMF::aheatmap(LRRs.ann[[2]], Rowv = NA, Colv = NA, annCol = LRRs.ann[[1]], scale = "none")

atlas_lrrs <- Create_plot_data(genes = LRRS, origin.data = atlas_full_tpm, cell_groups = cells.group,
var_genes = NULL, if.use.var.genes = FALSE)
## 2017-12-25 03:38:03 INFO: Data dimensions: 1522x27
p <- ggplot(atlas_lrrs, aes(x = Groups, y = variable)) + geom_tile(aes(fill = value)) +
scale_fill_continuous(low = "lightblue", high = "red") + ggtitle("LRRS") +
theme_bw()
p + labs(x = "", y = "") + scale_x_discrete(expand = c(0, 0), position = "bottom") +
scale_y_discrete(expand = c(0, 0), position = "right") + theme(axis.text.x = element_text(face = "bold",
color = "blue", size = 8, hjust = 1, angle = 90), axis.text.y = element_text(color = "blue",
size = 6), panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

Figure f
transition_factors <- read.table("./Extend_data/Transition-factors.txt")
transition_factors <- as.character(transition_factors$V1)
TF.ann <- Heatmap_fun(genes = transition_factors, tpm.data = atlas_full_tpm,
condition = unique(cells.group), all.condition = cells.group)
## There ara 9 conditions
## Whether creat data accurate 0
NMF::aheatmap(TF.ann[[2]], Rowv = NA, Colv = NA, annCol = TF.ann[[1]], scale = "none")

atlas_tf <- Create_plot_data(genes = transition_factors, origin.data = atlas_full_tpm,
cell_groups = cells.group, var_genes = NULL, if.use.var.genes = FALSE)
## 2017-12-25 03:38:16 INFO: Data dimensions: 1522x157
p <- ggplot(atlas_tf, aes(x = Groups, y = variable)) + geom_tile(aes(fill = value)) +
scale_fill_continuous(low = "lightblue", high = "red") + ggtitle("LRRS") +
theme_bw()
p + labs(x = "", y = "") + scale_x_discrete(expand = c(0, 0), position = "bottom") +
scale_y_discrete(expand = c(0, 0), position = "right") + theme(axis.text.x = element_text(face = "bold",
color = "blue", size = 8, hjust = 1, angle = 90), axis.text.y = element_text(color = "blue",
size = 2), panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
